// A framework for simple tokenizers. Takes care of newlines and
// white-space, and of getting the text from the source stream into
// the token object. A state is a function of two arguments -- a
// string stream and a setState function. The second can be used to
// change the tokenizer's state, and can be ignored for stateless
// tokenizers. This function should advance the stream over a token
// and return a string or object containing information about the next
// token, or null to pass and have the (new) state be called to finish
// the token. When a string is given, it is wrapped in a {style, type}
// object. In the resulting object, the characters consumed are stored
// under the content property. Any whitespace following them is also
// automatically consumed, and added to the value property. (Thus,
// content is the actual meaningful part of the token, while value
// contains all the text it spans.)

function tokenizer(source, state) {
  // Newlines are always a separate token.
  function isWhiteSpace(ch) {
	// The messy regexp is because IE's regexp matcher is of the
	// opinion that non-breaking spaces are no whitespace.
	return ch != "\n" && /^[\s\u00a0]*$/.test(ch);
  }

  var tokenizer = {
	state: state,

	take: function(type) {
	  if (typeof(type) == "string")
		type = {style: type, type: type};

	  type.content = (type.content || "") + source.get();
	  if (!/\n$/.test(type.content))
		source.nextWhile(isWhiteSpace);
	  type.value = type.content + source.get();
	  return type;
	},

	next: function () {
	  if (!source.more()) throw StopIteration;

	  var type;
	  if (source.equals("\n")) {
		source.next();
		return this.take("whitespace");
	  }

	  if (source.applies(isWhiteSpace))
		type = "whitespace";
	  else
		while (!type)
		  type = this.state(source, function(s) {tokenizer.state = s;});

	  return this.take(type);
	}
  };
  return tokenizer;
}
